条形图(barplot)-基因邻接点节点数目
数据导入
数据处理
开始绘图
#绘制柱状图
pdf(file=paste(dir,"01barplotStat.pdf",sep=""),width=6,height=8)
par(mar=c(5,7,2,3),xpd=TRUE) #定义边界位置
bar=barplot(n,horiz=TRUE,col="skyblue",names=FALSE,
xlim=c(0,ceiling(max(n)/5)*5), #ceiling返回大于x的最小整数
xlab="Number of adjacent nodes") #设置x轴名称
text(x=n*0.95,y=bar,n) #添加每个基因的邻接点节点数目
text(x=-0.2,y=bar,label=names(n), #添加基因名称
xpd=TRUE,
pos=2) #1,2,3,4:below,left,above,right
dev.off()
#图例绘在制图区外,必须设置参数xpd=TRUE
#否则命令正确也不会出图,因为默认xpd=FALSE条形图(barplot)-基因显著性条形图
数据导入
数据处理
开始绘图
library(ggplot2)
p=ggplot(data=rt)+
geom_bar(aes(x=Term, y=Count, fill=FDR),stat='identity')+
coord_flip() + #调整条形的方向
scale_fill_gradient(low="red", high = "blue")+ #连续值颜色填充
xlab("Term") +
ylab("Gene count") +
theme(axis.text.x=element_text(color="black",size=10),
axis.text.y=element_text(color="black", size=10)) +
scale_y_continuous(expand=c(0,0)) + #y轴是连续-注意coord_flip调整了方向
scale_x_discrete(expand=c(0,0))+ #x轴是分类-注意expand=c(0,0)设置原点
theme_bw()
ggsave(file=paste(dir,"02barplotPval.pdf",sep=""),width=8,height=6)
#mapping 数据映射,更改映射默认值时使用
#data 数据集,更改映射数据集时使用
#stat 统计变换,这个参数使用频率相对较高
#stat 默认值为bin的情况下,是对数据进行计数
#stat 改成identity此时的条形图的长短表示各分类对y值求和
#position 位置调整,用于调整图层中重叠的点
#width 用于调整条形的宽度条形图(barplot)-堆积百分比条形图
数据导入
开始绘图
pdf(file=paste(dir,"03barplotPerent.pdf",sep=""),width=20,height=10)
par(las=1,mar=c(8,5,4,16),mgp=c(3,0.1,0),cex.axis=1.5)
#las=1:always horizontal
#mgp:The margin line
#mar:A numerical vector of the form c(bottom, left, top, right)
a1=barplot(rt,col=col,yaxt="n",ylab="Relative Percent",xaxt="n",cex.lab=1.8)
a2=axis(2,tick=F,labels=F)
axis(2,a2,paste0(a2*100,"%"))
axis(1,a1,labels=F)
par(srt=60,xpd=T);text(a1,-0.02,colnames(rt),adj=1,cex=0.6);par(srt=0)
ytick2 = cumsum(rt[,ncol(rt)]) #累加
ytick1 = c(0,ytick2[-length(ytick2)])
legend(par('usr')[2]*0.98,par('usr')[4],legend=rownames(rt),col=col,pch=15,bty="n",cex=1.3)
dev.off()barplotGroup
数据导入
开始绘图
library(ggpubr)
pdf(file=paste(dir,"04barplotGroup.pdf",sep=""),width=8,height=6)
ggbarplot(rt, x="Term", y="Count", fill = "ONTOLOGY", color = "white",
orientation = "horiz", #横向显示
palette = "aaas", #配色方案
legend = "right", #图例位置
sort.val = "asc", #上升排序,区别于desc
sort.by.groups=TRUE)+ #按组排序
scale_y_continuous(expand=c(0, 0)) + scale_x_discrete(expand=c(0,0))
dev.off()boxplotSort
数据导入
开始绘图
#定义输出图片的排序方式
library(plyr)
library(ggpubr)
med=ddply(rt,"Type",summarise,med=median(expression))
rt$Type=factor(rt$Type, levels=med[order(med[,"med"],decreasing = T),"Type"])
#绘制
col=rainbow(length(levels(factor(rt$Type))))
p=ggboxplot(rt, x="Type", y="expression", color = "Type",
palette = col,
ylab=y,
xlab=x,
#add = "jitter", #绘制每个样品的散点
legend = "right")
pdf(file=paste(dir,"05boxplotSort.pdf",sep=""), width=10, height=6)
p+rotate_x_text(60) #倾斜角度
dev.off()boxplotDiff
数据导入
开始绘图
library(ggpubr)
#设置比较租
group=levels(factor(rt$Type))
rt$Type=factor(rt$Type, levels=group)
comp=combn(group,2)
my_comparisons=list()
for(i in 1:ncol(comp)){my_comparisons[[i]]<-comp[,i]}
#绘制boxplot
boxplot=ggboxplot(rt, x="Type", y="Expression", color="Type",
xlab=x,
ylab=y,
legend.title=x,
palette = c("blue","red"),
add = "jitter")+
stat_compare_means(comparisons = my_comparisons)
#输出图片
pdf(file=paste(dir,"06boxplotDiff.pdf",sep=""),width=5,height=4.5)
print(boxplot)
dev.off()boxplotMulti
数据导入
数据处理
x=colnames(rt)[1]
colnames(rt)[1]="Type"
data=melt(rt,id.vars=c("Type"))
colnames(data)=c("Type","Gene","Expression")library(reshape2)
library(ggpubr)
#绘制boxplot
p=ggboxplot(data, x="Gene", y="Expression", color = "Type",
ylab="Gene expression",
xlab="",
legend.title=x,
palette = c("blue","red"),
width=0.6, add = "none")
p=p+rotate_x_text(60)
p1=p+stat_compare_means(aes(group=Type),
method="wilcox.test",
symnum.args=list(cutpoints = c(0, 0.001, 0.01, 0.05, 1), symbols = c("***", "**", "*", " ")),
label = "p.signif")
#输出图片
pdf(file=paste(dir,"07boxplotMulti.pdf",sep=""),width=8,height=6)
print(p1)
dev.off()boxplotClinical
数据导入
开始绘图
library(ggpubr)
#设置比较租
group=levels(factor(rt$Type))
rt$Type=factor(rt$Type, levels=group)
comp=combn(group,2)
my_comparisons=list()
for(i in 1:ncol(comp)){my_comparisons[[i]]<-comp[,i]}
#绘制boxplot
boxplot=ggboxplot(rt, x="Type", y="Expression", color="Type",
xlab=x,
ylab=y,
legend.title=x,
add = "jitter")+
stat_compare_means(comparisons = my_comparisons)
#输出图片
pdf(file=paste(dir,"08boxplotClinical.pdf",sep=""), width=5.5, height=5)
print(boxplot)
dev.off()boxplotFacet
数据导入
数据处理
x=colnames(rt)[1]
colnames(rt)[1]="Type"
#差异分析
geneSig=c("")
for(gene in colnames(rt)[2:ncol(rt)]){
rt1=rt[,c(gene,"Type")]
colnames(rt1)=c("expression","Type")
p=1
if(length(levels(factor(rt1$Type)))>2){
test=kruskal.test(expression ~ Type, data = rt1)
p=test$p.value
}else{
test=wilcox.test(expression ~ Type, data = rt1)
p=test$p.value
}
Sig=ifelse(p<0.001,"***",ifelse(p<0.01,"**",ifelse(p<0.05,"*","")))
geneSig=c(geneSig,Sig)
}
colnames(rt)=paste0(colnames(rt),geneSig)
#把数据转换成ggplot2输入文件
library(reshape2)
data=melt(rt,id.vars=c("Type"))
colnames(data)=c("Type","Gene","Expression")开始绘制
#绘制
library(ggplot2)
p1=ggplot(data,aes(x=Type,y=Expression,fill=Type))+
guides(fill=guide_legend(title=x))+
labs(x = x, y = "Gene expression")+
geom_boxplot()+ facet_wrap(~Gene,nrow =1)+ theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
#输出
pdf(file=paste(dir,"09boxplotFacet.pdf",sep=""), width=9, height=5)
print(p1)
dev.off()vioplot
数据导入
开始绘图
#设置比较组
group=levels(factor(rt$Type))
rt$Type=factor(rt$Type, levels=group)
comp=combn(group,2)
my_comparisons=list()
for(i in 1:ncol(comp)){my_comparisons[[i]]<-comp[,i]}
#绘制
library(ggpubr)
pdf(file=paste(dir,"10vioplot-1.pdf",sep=""), width=6, height=5)
ggviolin(rt, x="Type", y="Expression", fill = "Type",
xlab=x, ylab=y,
legend.title=x,
add = "boxplot", add.params = list(fill="white"))+
stat_compare_means(comparisons = my_comparisons)
dev.off()
pdf(file=paste(dir,"10vioplot-2.pdf",sep=""), width=6, height=5)
ggviolin(rt, x="Type", y="Expression", fill = "Type",
xlab=x, ylab=y,
legend.title=x,
add = "boxplot", add.params = list(fill="white"))+
stat_compare_means(comparisons = my_comparisons,
symnum.args=list(cutpoints = c(0,0.001,0.01,0.05,1),
symbols = c("***","**","*","ns")),
label = "p.signif")
dev.off()vioplotMulti
数据导入
数据处理
开始绘图
#绘制小提琴图
library(ggpubr)
p=ggviolin(data, x="Gene", y="Expression", color = "Type",
ylab="Gene expression",
xlab=x,
legend.title=x,
add.params = list(fill="white"),
palette = c("blue","red"),
width=1, add = "boxplot")
p=p+rotate_x_text(60)
p1=p+stat_compare_means(aes(group=Type),
method="wilcox.test",
symnum.args=list(cutpoints = c(0, 0.001, 0.01, 0.05, 1),
symbols = c("***", "**", "*", " ")),
label = "p.signif")
#输出
pdf(file=paste(dir,"11vioplotMulti.pdf",sep=""), width=6, height=5)
print(p1)
dev.off()vioplotFacet
数据导入
数据处理
x=colnames(rt)[1]
colnames(rt)[1]="Type"
#差异分析
geneSig=c("")
for(gene in colnames(rt)[2:ncol(rt)]){
rt1=rt[,c(gene,"Type")]
colnames(rt1)=c("expression","Type")
p=1
if(length(levels(factor(rt1$Type)))>2){
test=kruskal.test(expression ~ Type, data = rt1)
p=test$p.value
}else{
test=wilcox.test(expression ~ Type, data = rt1)
p=test$p.value
}
Sig=ifelse(p<0.001,"***",ifelse(p<0.01,"**",ifelse(p<0.05,"*","")))
geneSig=c(geneSig,Sig)
}
colnames(rt)=paste0(colnames(rt),geneSig)
#把数据转换成ggplot2文件
library(reshape2)
data=melt(rt,id.vars=c("Type"))
colnames(data)=c("Type","Gene","Expression")开始绘图
#绘制
p1=ggplot(data,aes(x=Type,y=Expression,fill=Type))+
guides(fill=guide_legend(title=x))+
labs(x = x, y = "Gene expression")+
geom_violin()+ geom_boxplot(width=0.2,position=position_dodge(0.9))+
facet_wrap(~Gene,nrow =1)+ theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
#出图
pdf(file=paste(dir,"12vioplotFacet.pdf",sep=""), width=9, height=5)
print(p1)
dev.off()pairDiff
数据导入
开始绘图
#绘制
library("ggpubr")
pdf(file=paste(dir,"13pairDiff-1.pdf",sep=""), width=5, height=4)
ggpaired(data, cond1 = cond1, cond2 = cond2, fill = "condition",
palette = "jco",
legend.title="Condition",xlab="",ylab = ylab)+
stat_compare_means(paired = TRUE, label = "p.format", label.x = 1.35)
dev.off()
pdf(file=paste(dir,"13pairDiff-2.pdf",sep=""), width=5, height=4)
ggpaired(data, cond1 = cond1, cond2 = cond2, fill = "condition",
palette = "jco",
legend.title="Condition",xlab="",ylab = ylab)+
stat_compare_means(paired = TRUE,
symnum.args=list(cutpoints = c(0, 0.001, 0.01, 0.05, 1),
symbols = c("***", "**", "*", "ns")),
label = "p.signif",label.x = 1.35)
dev.off()ggballoonplot
数据导入
15Deviation
数据导入
数据处理
开始绘图
library(ggpubr)
pdf(file=paste(dir,"15Deviation.pdf",sep=""),width=6,height=5)
ggbarplot(rt, x="Name", y="Value", fill = "Regulate", color = "white",
palette = "jco",
sort.val = "desc", sort.by.groups = FALSE,
x.text.angle=90,
xlab = x, ylab = y,
legend.title="Regulate", rotate=TRUE, ggtheme = theme_minimal())
#rotatex设置x/y对调
dev.off()heatmap
数据导入
开始绘制
cliHeatmap
数据导入
数据处理
vol
数据导入
数据处理
开始绘图
#绘制火山图
library(ggplot2)
p = ggplot(rt, aes(logFC, -log10(fdr)))+
geom_point(aes(col=Significant))+
scale_color_manual(values=c("green", "black", "red"))+
labs(title = " ")+
theme(plot.title = element_text(size = 16, hjust = 0.5, face = "bold"))
p=p+theme_bw()
#出图
pdf(file=paste(dir,"18vol.pdf",sep=""),width=5.5,height=4.5)
print(p)
dev.off()venn
数据导入
#设置路径
path <-paste(dir,"19venn/",sep="")
#设置工作目录
setwd(path)
#读取数据
outFile="intersectGenes.txt" #输出交集基因文件
files=dir() #获取目录下所有文件
files=grep("txt$",files,value=T) #提取TXT结尾的文件
geneList=list()
#读取所有txt文件中的基因信息,保存到GENELIST
for(i in 1:length(files)){
inputFile=files[i]
if(inputFile==outFile){next}
rt=read.table(inputFile,header=F) #读取
geneNames=as.vector(rt[,1]) #提取基因名
geneNames=gsub("^ | $","",geneNames) #去掉基因首尾的空格
uniqGene=unique(geneNames) #基因取unique
header=unlist(strsplit(inputFile,"\\.|\\-"))
geneList[[header[1]]]=uniqGene
uniqLength=length(uniqGene)
print(paste(header[1],uniqLength,sep=" "))
}开始绘图
#绘制venn
library(VennDiagram)
venn.plot=venn.diagram(geneList,filename=NULL,fill=rainbow(length(geneList)) )
pdf(file=paste(dir,"19venn.pdf",sep=""),width=5, height=5)
grid.draw(venn.plot)
dev.off()
#保存交集基因
intersectGenes=Reduce(intersect,geneList)
write.table(file=paste(dir,"intersectGenes.txt",sep=""),
intersectGenes,sep="\t",quote=F,col.names=F,row.names=F)UpSetR
数据导入
#设置路径
path <-paste(dir,"20UpSetR/",sep="")
#设置工作目录
setwd(path)
#读取数据
library(UpSetR)
files=dir() #获取目录下所有文件
files=grep("txt$",files,value=T) #提取.txt结尾的文件
geneList=list()
#获取所有txt文件中的基因信息,保存到geneList
for(i in 1:length(files)){
inputFile=files[i]
if(inputFile==outFile){next}
rt=read.table(inputFile,header=F) #读取输入文件
geneNames=as.vector(rt[,1]) #提取基因名称
geneNames=gsub("^ | $","",geneNames) #去掉基因首尾的空格
uniqGene=unique(geneNames) #基因取unique,唯一基因列表
header=unlist(strsplit(inputFile,"\\.|\\-"))
geneList[[header[1]]]=uniqGene
uniqLength=length(uniqGene)
print(paste(header[1],uniqLength,sep=" "))
}开始绘图
#绘制UpSet
upsetData=fromList(geneList)
pdf(file=paste(dir,"20UpSetR.pdf",sep=""),onefile = FALSE,width=9,height=6)
upset(upsetData,
nsets = length(geneList), #展示多少个数据
nintersects = 50, #展示基因集数目
order.by = "freq", #按照数目排序
show.numbers = "yes", #柱状图上方是否显示数值
number.angles = 20, #字体角度
point.size = 2, #点的大小
matrix.color="red", #交集点颜色
line.size = 0.8, #线条粗细
mainbar.y.label = "Gene Intersections",
sets.x.label = "Set Size")
dev.off()
#保存交集文件
intersectGenes=Reduce(intersect,geneList)
write.table(file=paste(dir,"intersectGenes.txt",sep=""),
intersectGenes,sep="\t",quote=F,col.names=F,row.names=F)cor
数据导入
数据处理
开始绘图
library(ggplot2)
library(ggpubr)
library(ggExtra)
#相关性分析
df1=as.data.frame(cbind(x,y))
corT=cor.test(x,y,method="spearman")
cor=corT$estimate
pValue=corT$p.value
p1=ggplot(df1, aes(x, y)) +
xlab(gene1)+ylab(gene2)+
geom_point()+ geom_smooth(method="lm",formula = y ~ x) + theme_bw()+
stat_cor(method = 'spearman', aes(x =x, y =y))
p2=ggMarginal(p1, type = "density", xparams = list(fill = "orange"),yparams = list(fill = "blue"))
#出图1
pdf(file=paste(dir,"21cor-1.pdf",sep=""),width=5,height=4.8)
print(p1)
dev.off()
#出图2
pdf(file=paste(dir,"21cor-2.pdf",sep=""),width=5,height=5)
print(p2)
dev.off()corrplot
数据导入
corCircos
数据导入
数据处理
开始绘制
library(corrplot)
library(circlize)
#绘制圈图
pdf(file=paste(dir,"23corCircos.pdf",sep=""),width=7,height=7)
par(mar=c(2,2,2,4))
circos.par(gap.degree=c(3,rep(2, nrow(cor1)-1)), start.degree = 180)
chordDiagram(cor1, grid.col=rainbow(ncol(rt)),
col=col1,transparency = 0.5,symmetric = T)
par(xpd=T)
colorlegend(col, vertical = T,labels=c(1,0,-1),
xlim=c(1.1,1.3),ylim=c(-0.4,0.4))
dev.off()
circos.clear()corNetwork
数据导入
数据处理
cordata=cor(t(data))
cutoff=0.4 #相关性阈值
#保留相关性矩阵的一半
mydata = cordata
upper = upper.tri(mydata)
mydata[upper] = NA
#把相关性矩阵转换为数据框
df = data.frame(gene=rownames(mydata),mydata)
library(reshape2)
dfmeltdata = melt(df,id="gene")
dfmeltdata = dfmeltdata[!is.na(dfmeltdata$value),]
dfmeltdata = dfmeltdata[dfmeltdata$gene!=dfmeltdata$variable,]
dfmeltdata = dfmeltdata[abs(dfmeltdata$value)>cutoff,]进一步定义
library(igraph)
#定义网络图的节点和边
corweight = dfmeltdata$value
weight = corweight+abs(min(corweight))+5
d = data.frame(p1=dfmeltdata$gene,p2=dfmeltdata$variable,weight=dfmeltdata$value)
g = graph.data.frame(dfmeltdata,directed = FALSE)
#设置颜色,节点大小,字体大小
E(g)$color = ifelse(corweight>0,rgb(254/255,67/255,101/255,abs(corweight)),rgb(0/255,0/255,255/255,abs(corweight)))
V(g)$size = 8
V(g)$shape = "circle"
V(g)$lable.cex = 1.2
V(g)$color = "white"
E(g)$weight = weight开始绘图
#可视化
pdf(file=paste(dir,"24corNetwork.pdf",sep=""),width=7,height=6)
layout(matrix(c(1,1,1,0,2,0),byrow=T,nc=3),height=c(6,1),width=c(3,4,3))
par(mar=c(1.5,2,2,2))
vertex.frame.color = NA
plot(g,layout=layout_nicely,
vertex.label.cex=V(g)$lable.cex,
edge.width = E(g)$weight,edge.arrow.size=0,
vertex.label.color="black",
vertex.frame.color=vertex.frame.color,
edge.color=E(g)$color,
vertex.label.cex=V(g)$lable.cex,vertex.label.font=2,
vertex.size=V(g)$size,edge.curved=0.4)
#绘制图例
color_legend = c(rgb(254/255,67/255,101/255,seq(1,0,by=-0.01)),
rgb(0/255,0/255,255/255,seq(0,1,by=0.01)))
par(mar=c(2,2,1,2),xpd = T,cex.axis=1.6,las=1)
barplot(rep(1,length(color_legend)),border = NA,
space = 0,ylab="",xlab="",
xlim=c(1,length(color_legend)),
horiz=FALSE,
axes = F,
col=color_legend,main="")
axis(3,at=seq(1,length(color_legend),length=5),
c(1,0.5,0,-0.5,-1),
tick=FALSE)
dev.off()radar
数据导入
数据处理
开始绘图
library(fmsb)
pdf(file=paste(dir,"25radar.pdf",sep=""),height=7,width=7)
radarchart( data, axistype=1,
pcol=col, #线条颜色
plwd=2 , #线条粗细
plty=1, #虚线,实线
cglcol="grey", #背景线条颜色
cglty=1, #背景线条虚线,实线1
caxislabels=seq(-maxValue,maxValue,maxValue/2), #坐标刻度
cglwd=1.2, #背景线条粗细
axislabcol="blue", #刻度颜色
vlcex=0.8 #字体大小
)
dev.off()ggalluvial
数据导入
开始绘图
library(dplyr)
library(ggplot2)
pdf(file=paste(dir,"26ggalluvial.pdf",sep=""), width=7,height=6)
mycol <- rep(c("#029149","#6E568C","#E0367A","#D8D155","#223D6C","#D20A13",
"#431A3D","#91612D","#FFD121","#088247","#11AA4D","#58CDD9",
"#7A142C","#5D90BA","#64495D","#7CC767"),15)
ggplot(corLodes, aes(x = x,
stratum = stratum,
alluvium = Cohort,
fill = stratum,
label = stratum)) +
scale_x_discrete(expand =c(0,0)) +
#用aes.flow控制线调颜色,forward说明颜色和前面一致,backward说明与后面一致
geom_flow(width = 2/10,aes.flow = "forward") +
geom_stratum(alpha = .9,width = 2/10) +
scale_fill_manual(values = mycol) +
#size = 2代表基因名字大小
geom_text(stat = "stratum",
size = 2,
color="black") +
xlab("") +
ylab("") +
theme_bw() +
theme(axis.line = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_blank()) +
theme(panel.grid =element_blank()) +
theme(panel.border = element_blank()) +
ggtitle("") + guides(fill = FALSE)
#dev.off()pie
数据导入
数据处理
bubble
数据导入
开始绘制
library(ggplot2)
p = ggplot(rt,aes(Ratio, Term)) +
geom_point(aes(size=Count, color=FDR))
p1 = p +
scale_colour_gradient(low="red",high="blue") +
labs(color="FDR",size="Count",x="Gene ratio",y="Term")+
theme(axis.text.x=element_text(color="black", size=10),axis.text.y=element_text(color="black", size=10)) +
theme_bw()
ggsave(file=paste(dir,"28bubble.pdf",sep=""), width=7, height=5) #保存Lollipop
数据导入
开始绘图
#绘制棒棒糖图
library(ggpubr)
pdf(file=paste(dir,"29Lollipop.pdf",sep=""),width=7,height=6)
ggdotchart(rt, x="Term", y="Count", color = "ONTOLOGY",group = "ONTOLOGY",
palette = "aaas", #配色方案
legend = "right", #图例位置
sorting = "descending", #上升排序,区别于desc
add = "segments", #增加线段
rotate = TRUE, #横向显示
dot.size = 5, #圆圈大小
label = round(rt$Count), #圆圈内数值
font.label = list(color="white",size=9, vjust=0.5), #圆圈内数值字体设置
ggtheme = theme_pubr())
dev.off()GOcircos
数据导入
加载R包
#install.packages("colorspace")
#install.packages("stringi")
#install.packages("ggplot2")
#install.packages("digest")
#install.packages("GOplot")
#if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#BiocManager::install("org.Hs.eg.db")
#BiocManager::install("DOSE")
#BiocManager::install("clusterProfiler")
#BiocManager::install("enrichplot")
library(clusterProfiler)
library(org.Hs.eg.db)
library(enrichplot)
library(ggplot2)
library(GOplot)数据处理-过滤
GO
#GO
GO=enrichGO(gene = gene,
OrgDb = org.Hs.eg.db,
pvalueCutoff =1,
qvalueCutoff = 1,
ont="all",
readable =T)
GO=as.data.frame(GO)
GO=GO[(GO$pvalue<pFilter & GO$p.adjust<adjPfilter),]
write.table(GO,file=paste(path,"GO.txt",sep=""),sep="\t",quote=F,row.names = F)go=data.frame(Category = GO$ONTOLOGY,ID = GO$ID,Term = GO$Description,
Genes = gsub("/", ", ", GO$geneID), adj_pval = GO$p.adjust)
#logFC
genelist=data.frame(ID = rt$gene, logFC = rt$logFC)
row.names(genelist)=genelist[,1]
circ <- circle_dat(go, genelist)
termNum = 5 #限定GO数目
termNum=ifelse(nrow(go)<termNum,nrow(go),termNum)
geneNum = nrow(genelist) #限定基因数目开始绘图
chord <- chord_dat(circ, genelist[1:geneNum,], go$Term[1:termNum])
#圈图1
pdf(file=paste(dir,"30GOcircos-1.pdf",sep=""),width = 10,height = 10.2)
GOChord(chord,
space = 0.001, #基因之间的间距
gene.order = 'logFC', #排序基因
gene.space = 0.25, #基因离圆圈距离
gene.size = 3, #
border.size = 0.1,
process.label = 7) #GO名称大小
dev.off()
#圈图2
pdf(file=paste(dir,"30GOcircos-2.pdf",sep=""),width = 12,height = 9)
GOCluster(circ, as.character(go[1:termNum,3]))
dev.off()KEGGcircos
数据导入
加载R包
#install.packages("colorspace")
#install.packages("stringi")
#install.packages("ggplot2")
#install.packages("digest")
#install.packages("GOplot")
#if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#BiocManager::install("org.Hs.eg.db")
#BiocManager::install("DOSE")
#BiocManager::install("clusterProfiler")
#BiocManager::install("enrichplot")
library(clusterProfiler)
library(org.Hs.eg.db)
library(enrichplot)
library(ggplot2)
library(GOplot)数据处理-过滤
KEGG富集分析
kk=enrichKEGG(gene = gene, organism = "hsa", pvalueCutoff =1, qvalueCutoff =1)
KEGG=as.data.frame(kk)
KEGG=KEGG[(KEGG$pvalue<pFilter & KEGG$p.adjust<adjPfilter),]
KEGG$geneID=as.character(sapply(KEGG$geneID,function(x)
paste(rt$gene[match(strsplit(x,"/")[[1]],as.character(rt$entrezID))],
collapse="/")))
write.table(KEGG,file=paste(path,"KEGG.txt",sep=""),
sep="\t",quote=F,row.names = F) #保存富集结果获取KEGG信息
kegg=data.frame(Category = "ALL",ID = KEGG$ID,
Term = KEGG$Description,
Genes = gsub("/", ", ", KEGG$geneID),
adj_pval = KEGG$p.adjust)
#读取基因的logFC
genelist <- data.frame(ID = rt$gene, logFC = rt$logFC)
row.names(genelist)=genelist[,1]
circ <- circle_dat(kegg, genelist)
termNum = 5 #限定Term数
termNum=ifelse(nrow(kegg)<termNum,nrow(kegg),termNum)
geneNum = nrow(genelist) #限定基因数目开始绘图
chord <- chord_dat(circ, genelist[1:geneNum,], kegg$Term[1:termNum])
pdf(file=paste(dir,"31KEGGcircos-1.pdf",sep=""),width = 11,height = 11.2)
GOChord(chord,
space = 0.001, #基因之间的间距
gene.order = 'logFC', #按照logFC对基因排序
gene.space = 0.25, #基因名跟圆圈的相对距离
gene.size = 4, #基因名字体大小
border.size = 0.1, #线条粗细
process.label = 7) #term字体大小
dev.off()multiGSEA
数据导入
开始绘图
gseaCol=c("#58CDD9","#7A142C","#5D90BA","#431A3D","#91612D","#6E568C",
"#E0367A","#D8D155","#64495D","#7CC767","#223D6C","#D20A13",
"#FFD121","#088247","#11AA4D")
library(ggplot2)
library(grid)
library(gridExtra)
#富集图
pGsea=ggplot(dataSet,aes(x=RANK.IN.GENE.LIST,y=RUNNING.ES,colour=pathway,group=pathway))+
geom_line(size = 1.5) +
scale_color_manual(values = gseaCol[1:nrow(dataSet)]) +
labs(x = "", y = "Enrichment Score", title = "") +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0),
limits = c(min(dataSet$RUNNING.ES - 0.02),
max(dataSet$RUNNING.ES + 0.02))) +
theme_bw() + theme(panel.grid = element_blank()) +
theme(panel.border = element_blank()) +
theme(axis.line = element_line(colour = "black")) +
theme(axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_blank()) +
geom_hline(yintercept = 0) +
guides(colour = guide_legend(title = NULL)) +
theme(legend.background = element_blank()) +
theme(legend.key = element_blank())+theme(legend.key.size=unit(0.5,'cm'))
#基因排序图
pGene=ggplot(dataSet,aes(RANK.IN.GENE.LIST,pathway,colour=pathway))+
geom_tile()+
scale_color_manual(values = gseaCol[1:nrow(dataSet)]) +
labs(x = "High expression<-->Low expression", y = "", title = "") +
scale_x_discrete(expand = c(0, 0)) +
scale_y_discrete(expand = c(0, 0)) +
theme_bw() +
theme(panel.grid = element_blank()) +
theme(panel.border = element_blank()) +
theme(axis.line = element_line(colour = "black"))+
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank())+
guides(color=FALSE)
gGsea = ggplot_gtable(ggplot_build(pGsea))
gGene = ggplot_gtable(ggplot_build(pGene))
maxWidth = grid::unit.pmax(gGsea$widths, gGene$widths)
gGsea$widths = as.list(maxWidth)
gGene$widths = as.list(maxWidth)
dev.off()survivalDiscrete
数据导入
数据处理
var="Stage" #用于生存分析的变量
rt=rt[,c("futime","fustat",var)]
colnames(rt)[3]="Type"
groupNum=length(levels(factor(rt[,"Type"])))
library(survival)
library(survminer)
#比较组间生存差异的P值
diff=survdiff(Surv(futime, fustat) ~Type,data = rt)
pValue=1-pchisq(diff$chisq,df=(groupNum-1)) #df自由度
if(pValue<0.001){
pValue="p<0.001"
}else{
pValue=paste0("p=",sprintf(".03f",pValue))
}
fit <- survfit(Surv(futime, fustat) ~ Type, data = rt)开始绘制
surPlot=ggsurvplot(fit,
data=rt,
conf.int=F, #置信区间
pval=pValue,
pval.size=5,
legend.labs=levels(factor(rt[,"Type"])),
legend.title=var,
xlab="Time(years)",
break.time.by = 1,
risk.table.title="",
risk.table=F,
risk.table.height=.25)
pdf(file=paste(dir,"33survivalDiscrete.pdf",sep=""),onefile = FALSE,width = 5,height =4.5)
print(surPlot)
dev.off()survivalContinuous
数据导入
数据处理
var="MATN3" #用于生存分析的变量
rt=rt[,c("futime","fustat",var)]
library(survival)
#根据中位值,把样品分为两组
group=ifelse(rt[,3]>median(rt[,3]),"High","Low")
diff=survdiff(Surv(futime, fustat) ~group,data = rt)
pValue=1-pchisq(diff$chisq,df=1)
if(pValue<0.001){
pValue="p<0.001"
}else{
pValue=paste0("p=",sprintf("%.03f",pValue))
}
fit <- survfit(Surv(futime, fustat) ~ group, data = rt) #剩余数目开始绘制
library(survminer)
#绘制
surPlot=ggsurvplot(fit,
data=rt,
conf.int=TRUE,
pval=pValue,
pval.size=5,
legend.labs=c("High", "Low"),
legend.title=var,
xlab="Time(years)",
break.time.by = 1,
risk.table.title="",
palette=c("red", "blue"),
risk.table=T,
risk.table.height=.25)
pdf(file=paste(dir,"34survivalContinuous.pdf",sep=""),onefile = FALSE,width = 6,height =5)
print(surPlot)
dev.off()survivalCutoff
数据导入
数据处理
var="MATN3"
rt=rt[,c("futime","fustat",var)]
colnames(rt)=c("futime","fustat","var")
library(survival)
library(survminer)
#获取最优cutoff
res.cut=surv_cutpoint(rt, time = "futime", event = "fustat",variables =c("var"))
res.cut
res.cat=surv_categorize(res.cut)
fit=survfit(Surv(futime, fustat) ~var, data = res.cat)
#比较高低表达生存差异
diff=survdiff(Surv(futime, fustat) ~var,data =res.cat)
pValue=1-pchisq(diff$chisq,df=1)
if(pValue<0.001){
pValue="p<0.001"
}else{
pValue=paste0("p=",sprintf("%.03f",pValue))
}开始绘制
surPlot=ggsurvplot(fit,
data=res.cat,
conf.int=TRUE,
pval=pValue,
pval.size=5,
legend.labs=c("High", "Low"),
legend.title=var,
xlab="Time(years)",
break.time.by = 1,
risk.table.title="",
palette=c("red", "blue"),
risk.table=T,
risk.table.height=.25)
pdf(file=paste(dir,"35survivalCutoff.pdf",sep=""), onefile = FALSE,width = 6,height =5)
print(surPlot)
dev.off()survival2vars
数据导入
数据处理
library(survival)
library(survminer)
var1="VCAN" #用于生存分析的变量1
var2="CXCR4" #用于生存分析的变量2
#根据基因表达,对数据分组
a=ifelse(rt[,var1]<median(rt[,var1]),paste0(var1," low"),paste0(var1," high"))
b=ifelse(rt[,var2]<median(rt[,var2]),paste0(var2," low"),paste0(var2," high"))
Type=paste(a,"+",b)
rt=cbind(rt,Type)
#生存差异统计
length=length(levels(factor(Type)))
diff=survdiff(Surv(futime, fustat) ~Type,data = rt)
pValue=1-pchisq(diff$chisq,df=length-1)
if(pValue<0.001){
pValue="p<0.001"
}else{
pValue=paste0("p=",sprintf("%.03f",pValue))
}
fit <- survfit(Surv(futime, fustat) ~ Type, data = rt)开始绘制
surPlot=ggsurvplot(fit,
data=rt,
conf.int=F, #不含置信区间
pval=pValue,
pval.size=5,
legend.labs=levels(factor(rt[,"Type"])),
legend.title="Type",
#font.legend=6,
legend = "right",
xlab="Time(years)",
break.time.by = 1,
risk.table.title="",
risk.table=F, #风险表格
risk.table.height=.25)
pdf(file=paste(dir,"36survival2vars.pdf",sep=""),onefile = FALSE,
width = 7.5,height =5)
print(surPlot)
dev.off()forest
数据导入
数据处理
开始绘图
#出图格式
pdf(file=paste(dir,"37forest.pdf",sep=""), width = 6, height =4.5)
n=nrow(rt)
nRow=n+1
ylim=c(1,nRow)
layout(matrix(c(1,2),nc=2),width=c(3,2))
#森林图左边的基因信息
xlim = c(0,3)
par(mar=c(4,2,1.5,1.5))
plot(1,xlim=xlim,ylim=ylim,type="n",axes=F,xlab="",ylab="")
text.cex=0.8
text(0,n:1,gene,adj=0,cex=text.cex)
text(1.5-0.5*0.2,n:1,pVal,adj=1,cex=text.cex);text(1.5-0.5*0.2,n+1,'pvalue',cex=text.cex,font=2,adj=1)
text(3,n:1,Hazard.ratio,adj=1,cex=text.cex);text(3,n+1,'Hazard ratio',cex=text.cex,font=2,adj=1,)
#绘制森林图
par(mar=c(4,1,1.5,1),mgp=c(2,0.5,0))
xlim = c(0,max(as.numeric(hrLow),as.numeric(hrHigh)))
plot(1,xlim=xlim,ylim=ylim,type="n",axes=F,ylab="",xaxs="i",xlab="Hazard ratio")
arrows(as.numeric(hrLow),n:1,as.numeric(hrHigh),n:1,angle=90,code=3,length=0.03,col="darkblue",lwd=2.5)
abline(v=1,col="black",lty=2,lwd=2)
boxcolor = ifelse(as.numeric(hr) > 1, 'red', 'blue')
points(as.numeric(hr), n:1, pch = 15, col = boxcolor, cex=1.3)
axis(1)
dev.off()Nomo
数据导入
数据处理
library(rms)
dd <- datadist(rt)
options(datadist="dd")
#生成函数
f <- cph(Surv(futime, fustat) ~Age+Gender+Grade+T+M+N+VCAN, x=T, y=T, surv=T,
data=rt, time.inc=1)
surv <- Survival(f)
#建立nomogram
nom <- nomogram(f, fun=list(function(x) surv(1, x),
function(x) surv(2, x),
function(x) surv(3, x)),
lp=F, funlabel=c("1-year survival", "2-year survival", "3-year survival"),
maxscale=100,
fun.at=c(0.99, 0.9, 0.8, 0.7, 0.5, 0.3,0.1,0.01)) 开始绘图
pdf(file=paste(dir,"38Nomo-1.pdf",sep=""), height=7,width=8.5)
plot(nom)
dev.off()
#calibration curve
time=3 #预测三年calibration
f <- cph(Surv(futime, fustat) ~ Age+Gender+Grade+T+M+N+VCAN, x=T, y=T, surv=T,
data=rt, time.inc=time)
cal <- calibrate(f, cmethod="KM", method="boot",
u=time, m=100, B=1000) #m样品数目1/3
pdf(file=paste(dir,"38Nomo-2-cal.pdf",sep=""),height=6,width=7)
plot(cal,
xlab="Nomogram-Predicted Probability of 3-Year OS",
ylab="Actual 3-Year OS(proportion)",col="red",sub=F)
dev.off()ROC
数据导入
multiVarROC
数据导入
开始绘图
library(pROC)
y=colnames(rt)[1]
#定义颜色
bioCol=c("red","blue","green","yellow")
if(ncol(rt)>4){
bioCol=rainbow(ncol(rt))}
#绘制
pdf(file=paste(dir,"40multiVarROC.pdf",sep=""),width=5,height=5)
roc1=roc(rt[,y], as.vector(rt[,2]))
aucText=c( paste0(colnames(rt)[2],", AUC=",sprintf("%0.3f",auc(roc1))) )
plot(roc1, col=bioCol[1])
for(i in 3:ncol(rt)){
roc1=roc(rt[,y], as.vector(rt[,i]))
lines(roc1, col=bioCol[i-1])
aucText=c(aucText, paste0(colnames(rt)[i],", AUC=",sprintf("%0.3f",auc(roc1))) )
}
legend("bottomright", aucText,lwd=2,bty="n",col=bioCol[1:(ncol(rt)-1)])
dev.off()timeROC
数据导入
开始绘制
library(survival)
library(survminer)
library(timeROC)
var="score" #需要的变量
ROC_rt=timeROC(T=rt$futime, delta=rt$fustat,
marker=rt[,var], cause=1,
weighting='aalen',
times=c(1), ROC=TRUE) #预测时间1年
pdf(file=paste(dir,"41timeROC.pdf",sep=""),width=5, height=5)
plot(ROC_rt, time=1, col='red', title=FALSE, lwd=2)
text(0.65,0.45,paste0('AUC = ',sprintf("%.03f",ROC_rt$AUC[2])),cex=1.2)
dev.off()multiTimeROC
数据导入
开始绘制
library(survival)
library(survminer)
library(timeROC)
var="score"
#绘制
ROC_rt=timeROC(T=rt$futime, delta=rt$fustat,
marker=rt[,var], cause=1,
weighting='aalen',
times=c(1,2,3), ROC=TRUE)
pdf(file=paste(dir,"42multiTimeROC.pdf",sep=""),width=5,height=5)
plot(ROC_rt,time=1,col='green',title=FALSE,lwd=2)
plot(ROC_rt,time=2,col='blue',add=TRUE,title=FALSE,lwd=2)
plot(ROC_rt,time=3,col='red',add=TRUE,title=FALSE,lwd=2)
legend('bottomright',
c(paste0('AUC at 1 years: ',sprintf("%.03f",ROC_rt$AUC[1])),
paste0('AUC at 2 years: ',sprintf("%.03f",ROC_rt$AUC[2])),
paste0('AUC at 3 years: ',sprintf("%.03f",ROC_rt$AUC[3]))),
col=c("green",'blue','red'),lwd=2,bty = 'n')
dev.off()multiVarTimeROC
数据导入
开始绘制
library(survival)
library(survminer)
library(timeROC)
#颜色
bioCol=rainbow(ncol(rt)-2)
#绘制
aucText=c()
pdf(file=paste(dir,"43multiVarTimeROC.pdf",sep=""),width=6,height=6)
i=3
ROC_rt=timeROC(T=rt$futime,delta=rt$fustat,marker=rt[,i],cause=1,weighting='aalen',times=c(1),ROC=TRUE)
plot(ROC_rt,time=1,col=bioCol[i-2],title=FALSE,lwd=2)
aucText=c(paste0(colnames(rt)[i],", AUC=",sprintf("%.3f",ROC_rt$AUC[2])))
abline(0,1)
for(i in 4:ncol(rt)){
ROC_rt=timeROC(T=rt$futime,delta=rt$fustat,marker=rt[,i],cause=1,weighting='aalen',times=c(1),ROC=TRUE)
plot(ROC_rt,time=1,col=bioCol[i-2],title=FALSE,lwd=2,add=TRUE)
aucText=c(aucText,paste0(colnames(rt)[i],", AUC=",sprintf("%.3f",ROC_rt$AUC[2])))
}
legend("bottomright", aucText,lwd=2,bty="n",col=bioCol[1:(ncol(rt)-2)])
dev.off()PCA
数据导入
数据处理
data=rt[,c(2:ncol(rt))]
Type=rt[,1]
var=colnames(rt)[1]
#PCA分析
data.pca=prcomp(data, scale. = TRUE)
pcaPredict=predict(data.pca)
PCA = data.frame(PC1=pcaPredict[,1], PC2=pcaPredict[,2], Type=Type)
#定义颜色
col=c("red","blue")
if(length(levels(factor(Type)))>2){
col=rainbow(length(levels(factor(Type))))}开始绘图
library(ggplot2)
#绘制PCA
pdf(file=paste(dir,"44PCA.pdf",sep=""),height=4.5, width=5.5) #???????????ļ?
p=ggplot(data = PCA, aes(PC1, PC2)) + geom_point(aes(color = Type)) +
scale_colour_manual(name=var, values =col)+
theme_bw()+
theme(plot.margin=unit(rep(1.5,4),'lines'))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
print(p)
dev.off()PCA3d
数据导入
数据处理
开始绘图
library(scatterplot3d)
pdf(file=paste(dir,"45PCA3d.pdf",sep=""), height=5, width=6)
par(oma=c(0.5,0.5,0.5,0.5))
s3d=scatterplot3d(pcaPredict[,1:3], pch = 16, color=col)
legend("top", legend =group,pch = 16, inset = -0.2, box.col="white",xpd = TRUE, horiz = TRUE,col=bioCol[1:length(group)])
dev.off()circos
genome
数据导入
数据处理
开始绘制
ymin=quantile(rt[,4],0.1)
ymax=quantile(rt[,4],0.9)*2
pdf(file=paste(dir,"47genome.pdf",sep=""),width=10, height=7)
kp=plotKaryotype("hg38", plot.type=1)
kpDataBackground(kp, data.panel=1, color="white")
kpPoints(kp, data=data.points, pch=".", col="red", ymin=ymin, ymax=ymax, cex=6)
kpAddBaseNumbers(kp, tick.dist=10000000, minor.tick.dist=1000000)
dev.off()ggtree
加载R包
数据导入
#设置路径
path <-paste(dir,"48ggtree/",sep="")
#设置工作目录
setwd(path)
#读取数据
treFile="input.tre" #进化树文件
groupFile="group.txt" #树枝分类
outFile="tree.pdf" #输出
#读取属性文件,把属性信息保存到list
cls=list()
rt=read.table(groupFile,sep="\t",header=T)
for(i in 1:nrow(rt)){
otu=as.character(rt[i,1])
phylum=as.character(rt[i,2])
cls[[phylum]]=c(cls[[phylum]], otu)
}
phylumNames=names(cls)
phylumNum=length(phylumNames)
#读取进化树文件,和属性文件合并
tree=read.tree(treFile)
tree=groupOTU(tree, cls)开始绘制
#绘制
pdf(file=paste(dir,"48ggtree.pdf",sep=""),width=7, height=7)
ggtree(tree,
layout="circular",
ladderize = F,
branch.length="none",
aes(color=group)) +
scale_color_manual(values=c(rainbow_hcl(phylumNum+1)),
breaks=phylumNames,
labels=phylumNames ) +
theme(legend.position="right") +
geom_text(aes(label=paste(" ",label,sep=""),
angle=angle+45),
size=2)
dev.off()waterfall
数据导入
gganatogram
数据导入
#设置路径
path <-paste(dir,"50gganatogram/",sep="")
#设置工作目录
setwd(path)
#source("https://neuroconductor.org/neurocLite.R")
#neuro_install("gganatogram")
#install.packages("devtools")
#library(devtools)
#devtools::install_github("jespermaag/gganatogram")
#读取数据
rt=read.table("male.txt" , header=T, sep="\t", check.names=F, stringsAsFactors=F) 开始绘制
gender="male"
library(gganatogram)
library(dplyr)
library(viridis)
library(gridExtra)
pdf(file=paste(dir,"50gganatogram.pdf",sep=""), width=8,height=6)
gganatogram(data=rt, fillOutline='white',
organism='human',
sex=gender, fill="value")+
theme_void() +
scale_fill_gradient2(low = "green", mid="black", high = "red",midpoint = 3)
dev.off()更多课程
敬请期待